*************************************************************
*                     SEGREGATION PROJECT                   *
* Olivier.Godechot@sciencespo.fr                            *
*************************************************************

set more off, permanently
set pagesize 10000
set matsize 10000
*ssc install outreg2 /*install outreg2 if necessary */

*Directory where you can store results and provisional files
cd "D:\Olivier\Documents\Stata"


*Counts the number of files for appending
local n=1

* Opens logfile
capture log close
log using COIN_immigration_main.log, replace


***********************************************************
* !!!! ADAPT WITH AVAILABLE YEARS !!!!!!!!!!!!!!!!!!!!!!!!*

* Adapt with your years
/*global startyear 1994 
global otheryears 1995 1996 1997 1998 1999 ///
					2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 ///  					
					2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
*/

global startyear 2010
global otheryears 2011 2012
***********************************************************

global allyears $startyear $otheryears
*global allyears $otheryears

*Other way of doing it : foreach y of numlist 1990/2010 { *
foreach y in $allyears {
	clear

	*	local y=2010
	 	 use D:\Olivier\Owncloud\Research\Work_Segregation\Coin_Shared_Programs\Example_dta\exampleb_`y'.dta 
	  * use D:\Olivier\Owncloud\Research\Work_Segregation\Coin_Shared_Programs\Example_dta\example_2010.dta 
   
	
	*Alternative if the different years are in the same file
	*keep if year==`y'
	

**********************************************************


	**********************************************************
	* !!!! CHANGE YOUR WITH YOUR VARIABLE NAMES  !!!!        *
	*For wage definition
	rename b42 wage 
	rename b32 nbhours
	rename b26 seniority
	rename b52 weight 
	*gen weight=1  /* if population data and/or representative sample withouth weight and/or no weight variable then use this */
	
	*Units variables
	rename key_b firm 
	rename key_l est 
	
	/*IF YOU DON'T HAVE FIRM ids => USE est ids for firms (and tell me so) */
	/*IF YOU DON'T HAVE EST ids => USE Firm ids for est (and tell me so), which you could eventually interact 
			with workers geographical nuts3 units */
	
	/*IMPORTANT FOR SAMPLE DATA : A numeric variable on establishment (or firm - if establishment non available) size. */
	/*If the population in your data is exhaustive within units, we will build it based on our definition of workers. So you can put the variable in comment */
	/*If you have a sample and don't have a numeric variable on unit size and that your sampling weights are frequency weights, we can build it 
		as well based on the sum of the weights. */
	rename est_size original_est_size /* This variable is to be used if establishment sample is not exhaustive per establishment and you have it. */
	
	*Geography 
	rename a13 ltown /*area (preferentially town) of residence*/
	
	* Geographical measures in which workplaces are nested
	rename a11 wtown /*area (preferentially town) of work*/
	rename a12 nuts1 /* geographical description corresponding to nuts1 approximately  ex. For France 9 regions */
	rename b421 nuts3 /* geographical description corresponding to nuts3 approximately  ex. For France 100 départements */
	
		* IF YOU DON'T HAVE WORKPLACE GEOGRAPHICAL VARIABLES  BUT ONLY INDIVIDUAL ONES: 
		*Example of how to transforman an worker level geographical variable into a workplace one
		/*
		egen estnuts3=mode(nuts3), max by(est)
		egen estnuts1=mode(nuts1), max by(est)
		egen estwtown=mode(ltown), max by(est)
		drop nuts3 nuts1 ltown

		*/	
	
	
	
	*Decomposition variables
	*Industry 
	
							/* My decomposition of nace 2 : 
							B to E : Manufacturing et al. (with Mining (B), Energy (D) and Water supply (E))
							F : Construction
							G & A: Wholesale and retail trade; repair of motor vehicles and motorcycles (G) & Agriculture (A)
							H : Transportation and storage
							I ; Accommodation and food service activities
							J & R : Information and communication (J) & Arts, entertainment and recreation (R)
							K : Financial and insurance activities
							L to M : Real estate activities (L) & Professional, scientific and technical activities (M)
							N : Administrative and support service activities
							O to Q : Public administration and defence; compulsory social security (0), 
									Education (P) & Human health and social work activities (Q)
							S to U : Other service activities (S) Activities of households as employers (T), Activities of extraterritorial organisations and bodies (U) */
	rename nace sector_agg /* aggregate version of industry 10. */
	rename b422 sector 
	
	
	* Social characterics
	rename b23 occ
	rename b25 education
	rename age age
	rename b21 gender
	rename b27 full_part_time
	rename b28 migration
	


	***********************************************************

	keep year wage nbhours seniority weight firm est wtown ltown nuts1 nuts3 sector_agg sector occ education ///
		age gender migration original_est_size full_part_time 

	************************************************************
	*!!!!         ADAPT THE CATEGORIES                     !!!!*
	* Financial center: the urban area / region / city with the main stock exchange (or with the biggest bank head quarters)
	* Financial service : finance and insurance (real estate excluded if possible)
	* Manufacturing : same definition as for Wage decomposition
	
	*OCCUPATION CATEGORIES - if available
	* occ1 : upper occupations : managers and professionals
	* occ2 : middle occupations : intermediaries jobs like technicians in industries and services
	* occ3 : lower occupations : blue collar workers and service clerks
	
	* *EDUCATION CATEGORIES - if available
	** educ1: no education /primary education
	** educ2: secondary education
	** educ3: college education /  First stage of tertiary education or Post-secondary non-tertiary education
	** educ4: master education / Second stage of tertiary education
	
	/* IF the variable is missing especially on some years, it might be easier to replace with a fake variable 
	than changing the whole program 
	gen fake_educ=rnormal(0,1)
	gen educ1=(fake_educ < -0.7)
	gen educ2=(fake_educ < 0 & fake_educ>=-0.7)
	gen educ3=(fake_educ < 0.7 & fake_educ>=0)
	gen educ4=(fake_educ>=0.7)
	*/


	* AGE categories
	* age1:  (age>14 & age<=30)
	* age2: (age>30 & age<=40)
	* age3: (age>40 & age<=55)
	* age4: (age>55 & age<=95)
	
	* Migration status => if available; otherwise ethnicity, nationality, etc.

	
	gen region9="n" if missing(wtown)==0
	gen financial_center=(wtown=="ITF")	
	
	gen sector_agg9="n" if missing(sector_agg)==0

	gen fin_service=(sector_agg=="XK")
	gen manufacturing=(sector_agg=="XO")
	
	
	
	gen educ1=(education==0 | education==1)
	gen educ2=(education==2 | education==3)
	gen educ3=(education==4 | education==5)
	gen educ4=(education==6)
	
	gen educ9="n" if educ1+educ2+educ3+educ4>0

	gen educ=1 if educ1==1
	replace educ=2 if educ2==1
	replace educ=3 if educ3==1
	replace educ=4 if educ4==1

	gen occ1=(occ>0 & occ<30)
	gen occ2=(occ>=30 & occ<=40)
	gen occ3=(occ>40 & occ<100)
	gen occ9="n" if occ1+occ2+occ3>0
	rename occ occ_or
	
	gen occ=1 if occ1==1
	replace occ=2 if occ2==1
	replace occ=3 if occ3==1


	gen age1=(age>14 & age<=30)
	gen age2=(age>30 & age<=40)
	gen age3=(age>40 & age<=55)
	gen age4=(age>55 & age<=95)
	gen age9="n" if age1+age2+age3+age4>0
	rename age age_or

/*	 *Example with another age coding 
	gen age1=(age == "10-19   " | age=="20-29   " )
	gen age2=(age=="30-39   " )
	gen age3=(age=="40-49   " )
	gen age4=(age=="50-59   " | age=="60+     ")
	gen age9="n" if missing(age)==0
	rename age age_or*/


	gen age=1 if age1==1
	replace age=2 if age2==1
	replace age=3 if age3==1
	replace age=4 if age4==1

	gen female=(gender=="F") if missing(gender)==0
	gen male=1-female
	gen female9="n" if missing(gender)==0

	gen fulltime=(full_part_time=="FT") if missing(full_part_time)==0
	gen parttime=1-fulltime
	gen fulltime9="n" if missing(full_part_time)==0
	
	gen migrant=(migration=="B") if missing(migration)==0
	gen nonmigrant=1-migrant
	gen migrant9="n" if missing(migration)==0
	
	gen lnwage=log(wage) 

	* NEW Hourly wage
	gen hwage=wage/nbhours if nbhours>0
	gen lnhwage=log(hwage) 
	* END NEW Hourly wage

	***********************************************************


	*************************************************************
	* !!!!!!!! ADAPT EARNING THRESHOLD !!!!!!!!!!!!!!!!!!!!!!!!!*
	* Earnings : earnings all workers working either full time or Part time
	* Working the full year or with at least one year of seniority
	* Selection threshold : 1/ half a yearly minimum wage 
	* OR 2/ half lowest decile FULL TIME , FULL YEAR (if possible or if not seniority>=1 year) wage
	
	 *WITH MINIMUM WAGE : 
	 * Example with French hourly minimum wage : 		
	 /* gen hminwage=6.83 if year==2002
	 	replace hminwage=7.19 if year==2003
		* ....
		replace hminwage=8.86 if year==2010
		replace hminwage=9.19 if year==2011
		
		*/ 

	/* Redefine selection threshold according to your data / 1600 ==> number of hours a year at 35 hours*/		
	*gen threshold=0.5*hminwage*1600 
	
	*WITHOUT MINIMUM WAGE : 
	egen p10 = pctile(wage) if seniority>=1 & seniority!=. & fulltime==1, p(10)
	egen threshold= mean(0.5*p10)
	
	
	*NEW 2018_03_14 : statistics on inclusion / exclusion
	* Error corrected for missings 2018-07-19
	gen included=(wage>threshold & wage !=. & weight>0 & weight !=. & seniority>=1)*1
	statsby mean_wage=r(mean) sd_wage=r(sd) N_wage=r(N) sum_w_wage=r(sum_w) ///
	min_wage=r(min) p10_wage=r(p10) p25_wage=r(p25) p50_wage=r(p50) p75_wage=r(p75)  ///
	p90_wage=r(p90) p95_wage=r(p95) p99_wage=r(p99) max_wage=r(max) year=`y' ///
	[aweight=weight], by(included) total subsets saving (deswage_`y',replace):summarize wage, detail
	*END NEW
	
	************************************************************
	
	keep if included==1
	drop migration gender seniority education age_or occ_or threshold included

	set seed 12099098
	gen random=runiform()
	rename weight original_weight
	

	
	************************************************************
	* Exposure per UNIT                                        *
	************************************************************
	* !!!! ADAPT UNITS !!!!                                    *
	* UNIT : est (establishment), firm, wtown (town of work),  *
	* ltown (town of residence)
	
	foreach u in est firm /*wtown*/ ltown {
*		local u="est"

		bysort `u': egen nbuny=sum(1)
		* Error corrected for missings 2018-07-19
		gen weight=original_weight if nbuny>1 & missing(`u')==0
		bysort `u': egen countuny=sum(weight)
		gen nottoosmall=(weight !=. & missing(`u')==0)

		sort nottoosmall wage random
		egen sweight=sum(weight)
		gen cum_weight=sum(weight)
		gen rkn=cum_weight/sweight
		drop sweight cum_weight
		
		
		bysort `u': egen lnwage_mu=mean(lnwage)
		gen lnwage_within=lnwage-lnwage_mu

		
		*NEW hourly wage
		sort nottoosmall hwage random
		egen sweighth=sum(weight)
		gen cum_weighth=sum(weight)
		gen rknh=cum_weighth/sweighth
		drop sweighth cum_weighth

		bysort `u': egen lnhwage_mu=mean(lnhwage)
		gen lnhwage_within=lnhwage-lnhwage_mu
		* END of NEW hourly wage

			
	
		************************************************************
		* !!!! ADAPT ESTABLISHMENT SIZE !!!! *
		************************************************************
		* if establishment sample is exhaustive per establishment and you have 
		* a variable on establishment size, build orgsize as follows. Otherwise, 
		* you should use a preexisting original_orgsize variable on firm size 
		* - if available.			
		
		if "`u'" == "est" {
			/* SAMPLE DATA*/
			gen nbwkrs=original_est_size
			
			/* POPULATION DATA*/
			*gen nbwkrs=countuny

			************************************************************
			
			gen orgsize=1 if nbwkrs>1 & nbwkrs<=20
			replace orgsize=2 if nbwkrs>20 & nbwkrs<=50
			replace orgsize=3 if nbwkrs>50 & nbwkrs<=100
			replace orgsize=4 if nbwkrs>100 & nbwkrs<=200
			replace orgsize=5 if nbwkrs>200 & nbwkrs<=500
			replace orgsize=6 if nbwkrs>500 & nbwkrs<=1000
			replace orgsize=7 if nbwkrs>1000 & nbwkrs<=2500
			replace orgsize=8 if nbwkrs>2500 & nbwkrs<=5000
			replace orgsize=9 if nbwkrs>5000 & nbwkrs !=.
			gen orgsize9="n" if missing(orgsize)==0
		}
		
		
		* Wage Decile exposure 
		gen d0=(rkn>0 & rkn<=0.1)
		gen d1=(rkn>0.1 & rkn<=0.2)
		gen d2=(rkn>0.2 & rkn<=0.3)
		gen d3=(rkn>0.3 & rkn<=0.4)
		gen d4=(rkn>0.4 & rkn<=0.5)		
		gen d5=(rkn>0.5 & rkn<=0.6)
		gen d6=(rkn>0.6 & rkn<=0.7)
		gen d7=(rkn>0.7 & rkn<=0.8)		
		gen d8=(rkn>0.8 & rkn<=0.9)		
		gen d9=(rkn>0.9 & rkn<=1)		
	
		gen d=0 if (rkn>0 & rkn<=0.1)
		replace d=1 if (rkn>0.1 & rkn<=0.2)
		replace d=2 if (rkn>0.2 & rkn<=0.3)
		replace d=3 if (rkn>0.3 & rkn<=0.4)
		replace d=4 if (rkn>0.4 & rkn<=0.5)		
		replace d=5 if (rkn>0.5 & rkn<=0.6)
		replace d=6 if (rkn>0.6 & rkn<=0.7)
		replace d=7 if (rkn>0.7 & rkn<=0.8)		
		replace d=8 if (rkn>0.8 & rkn<=0.9)		
		replace d=9 if (rkn>0.9 & rkn<=1)		
		gen d_9="n" if missing(d)==0 

		
		bysort `u': egen d0uny=sum(d0*weight)
		bysort `u': egen d1uny=sum(d1*weight)
		bysort `u': egen d2uny=sum(d2*weight)
		bysort `u': egen d3uny=sum(d3*weight)
		bysort `u': egen d4uny=sum(d4*weight)
		bysort `u': egen d5uny=sum(d5*weight)
		bysort `u': egen d6uny=sum(d6*weight)
		bysort `u': egen d7uny=sum(d7*weight)
		bysort `u': egen d8uny=sum(d8*weight)
		bysort `u': egen d9uny=sum(d9*weight)		

		gen xd0=(d0uny-d0*weight)/(countuny-weight)
		gen xd1=(d1uny-d1*weight)/(countuny-weight)
		gen xd2=(d2uny-d2*weight)/(countuny-weight)
		gen xd3=(d3uny-d3*weight)/(countuny-weight)
		gen xd4=(d4uny-d4*weight)/(countuny-weight)
		gen xd5=(d5uny-d5*weight)/(countuny-weight)
		gen xd6=(d6uny-d6*weight)/(countuny-weight)
		gen xd7=(d7uny-d7*weight)/(countuny-weight)
		gen xd8=(d8uny-d8*weight)/(countuny-weight)
		gen xd9=(d9uny-d9*weight)/(countuny-weight)

		drop d0uny-d9uny

		tabstat xd0-xd9 [aweight=weight] if nottoosmall==1, by(d)

		foreach i of numlist 0/9 {
			statsby mean_xd`i'=r(mean) sd_xd`i'=r(sd) N_xd`i'=r(N) sum_w_xd`i'=r(sum_w) year=`y' ///
			if nottoosmall==1 & d_9=="n" [aweight=weight], by(d) total saving(dxd`i'_`u'_`y',replace):summarize xd`i'
		}
				
	
		drop d d0-d9 xd0-xd9 d_9

		* NEW
		* Hourly Wage exposure
		gen F0025h=(rknh>0 & rknh<=0.25)
		gen F2575h=(rknh>0.25 & rknh<=0.75)
		gen F7590h=(rknh>0.75 & rknh<=0.90)
		gen F9099h=(rknh>0.90 & rknh<=0.99)
		gen F9910h=(rknh>0.99 & rknh<=1)

		gen fhwage=1 if F0025h==1
		replace fhwage=2 if F2575h==1
		replace fhwage=3 if F7590h==1
		replace fhwage=4 if F9099h==1
		replace fhwage=5 if F9910h==1
		gen fhwage9="n" if missing(fhwage)==0

		bysort `u': egen F0025unyh=sum(F0025h*weight)
		bysort `u': egen F2575unyh=sum(F2575h*weight)
		bysort `u': egen F7590unyh=sum(F7590h*weight)
		bysort `u': egen F9099unyh=sum(F9099h*weight)
		bysort `u': egen F9910unyh=sum(F9910h*weight)

		gen xF0025h=(F0025unyh-F0025h*weight)/(countuny-weight)
		gen xF2575h=(F2575unyh-F2575h*weight)/(countuny-weight)
		gen xF7590h=(F7590unyh-F7590h*weight)/(countuny-weight)
		gen xF9099h=(F9099unyh-F9099h*weight)/(countuny-weight)
		gen xF9910h=(F9910unyh-F9910h*weight)/(countuny-weight)
		
		drop rknh F0025unyh-F9910unyh

		tabstat xF0025h xF2575h xF7590h xF9099h xF9910h [aweight=weight] if nottoosmall==1, by(fhwage)

		foreach i in xF0025h xF2575h xF7590h xF9099h xF9910h hwage lnhwage lnhwage_within  {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
			if nottoosmall==1 & fhwage9=="n" [aweight=weight], by(fhwage) total saving(fwage`i'_`u'_`y',replace):summarize `i'
		}

		drop fhwage lnhwage_mu lnhwage_within xF0025h-xF9910h F0025h-F9910h fhwage fhwage9 
		* END OF NEW

		
		* Wage exposure
		gen F0025=(rkn>0 & rkn<=0.25)
		gen F2575=(rkn>0.25 & rkn<=0.75)
		gen F7590=(rkn>0.75 & rkn<=0.90)
		gen F9099=(rkn>0.90 & rkn<=0.99)
		gen F9910=(rkn>0.99 & rkn<=1)
		gen F9010=F9099+F9910

		gen fwage=1 if F0025==1
		replace fwage=2 if F2575==1
		replace fwage=3 if F7590==1
		replace fwage=4 if F9099==1
		replace fwage=5 if F9910==1
		gen fwage9="n" if missing(fwage)==0

		bysort `u': egen F0025uny=sum(F0025*weight)
		bysort `u': egen F2575uny=sum(F2575*weight)
		bysort `u': egen F7590uny=sum(F7590*weight)
		bysort `u': egen F9099uny=sum(F9099*weight)
		bysort `u': egen F9910uny=sum(F9910*weight)
		bysort `u': egen F9010uny=sum(F9010*weight)
		

		gen xF0025=(F0025uny-F0025*weight)/(countuny-weight)
		gen xF2575=(F2575uny-F2575*weight)/(countuny-weight)
		gen xF7590=(F7590uny-F7590*weight)/(countuny-weight)
		gen xF9099=(F9099uny-F9099*weight)/(countuny-weight)
		gen xF9910=(F9910uny-F9910*weight)/(countuny-weight)
		gen xF9010=(F9010uny-F9010*weight)/(countuny-weight)
		gen xF0075=xF0025+xF2575
		gen invcountuny=1/countuny
		
		* NEW 2021 11 08
		* EXPORT YEAR ESTIMATES FOR REGRESSIONS
		if "`u'" == "est" {

		save temp/temp.dta, replace	
		
		sort est	
		keep year est nbwkrs wtown nuts1 nuts3 sector sector_agg nbuny weight orgsize ///
				F0025uny F2575uny F7590uny F9099uny F9910uny F9010uny ///
				F0025 F2575 F7590 F9099 F9910 F9010 xF9010 xF9910 xF0025 xF0075 lnwage_mu
				
			rename F0025uny f0025_w
			rename F2575uny f2575_w
			rename F7590uny f7590_w 
			rename F9099uny f9099_w
			rename F9910uny f9910_w
			rename F9010uny f9010_w
			
			rename F0025 f0025
			rename F2575 f2575
			rename F7590 f7590			
			rename F9099 f9099
			rename F9910 f9910
			rename F9010 f9010
			
			gen f0075=f0025+f2575
			
			rename nbuny nb_sample
			gen f9010xf9010_sw=xF9010*weight if f9010==1
			gen f9010xf0025_sw=xF0025*weight if f9010==1
			gen f9010xf0075_sw=xF0075*weight if f9010==1
			
			gen f9910xf9910_sw=xF9910*weight if f9910==1
			gen f9910xf0025_sw=xF0025*weight if f9910==1
			gen f9910xf0075_sw=xF0075*weight if f9910==1
			
			gen f0025xf0025_sw=xF0025*weight if f0025==1
			gen f0075xf0075_sw=xF0075*weight if f0075==1
			
			gen lnwage_mu_sw=lnwage_mu*weight

		
		collapse (firstnm) year nbwkrs wtown nuts1 nuts3 sector sector_agg nb_sample orgsize f0025_w f2575_w f7590_w f9099_w f9910_w f9010_w ///
				(sum) f0025 f2575 f7590 f9099 f9910 f9010 weight f9010xf9010_sw f9010xf0025_sw f9010xf0075_sw ///
						f9910xf9910_sw f9910xf0025_sw f9910xf0075_sw f0025xf0025_sw f0075xf0075_sw lnwage_mu_sw, by(est) 
		rename weight s_weight
		save temp/est`y'.dta,replace
				
		use temp/temp.dta, clear
		rm temp/temp.dta
		}
		* END NEW



		drop rkn F0025uny-F9910uny F9010uny

		tabstat xF0025 xF2575 xF7590 xF9099 xF9910 [aweight=weight] if nottoosmall==1, by(fwage)
		tabstat xF9010 xF9099 xF9910 [aweight=weight] if nottoosmall==1, by(F9010)

		*NEW 2018_03_14 : min and max
		foreach i in xF0025 xF2575 xF7590 xF9099 xF9910 wage lnwage lnwage_within countuny invcountuny  {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) min_`i'=r(min) max_`i'=r(max) year=`y' ///
			if nottoosmall==1 & fwage9=="n" [aweight=weight], by(fwage) total saving(fwage`i'_`u'_`y',replace):summarize `i'
		}


		
	
		if "`u'" == "est" {
		
	  *NEW 2018_07_25 : keep missing sectors and regions in the computation to avoid bizarre estimates of fractiles.
			
			
			foreach i in xF0025 xF2575 xF7590 xF9099 xF9910  wage lnwage lnwage_within countuny invcountuny {

				statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
				if nottoosmall==1 & fwage9=="n" [aweight=weight], by(fwage financial_center) total subsets saving (fwage_gc`i'_`u'_`y',replace):summarize `i'

				statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
				if nottoosmall==1 & fwage9=="n" [aweight=weight], by(fwage fin_service) total subsets saving(fwage_fin`i'_`u'_`y',replace):summarize `i'

				statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
				if nottoosmall==1 & fwage9=="n" [aweight=weight], by(fwage manufacturing) total subsets saving(fwage_man`i'_`u'_`y',replace):summarize `i'

				statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
				if nottoosmall==1 & fwage9=="n" & orgsize9=="n" [aweight=weight], by(fwage orgsize) total subsets saving(fwage_size`i'_`u'_`y',replace):summarize `i'
								
				* NEW 2021 11 08
				statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
				if nottoosmall==1 & fwage9=="n" [aweight=weight], by(fwage sector_agg) total subsets saving(fwage_sector_agg`i'_`u'_`y',replace):summarize `i'
				
				statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' ///
				if nottoosmall==1 & fwage9=="n" [aweight=weight], by(fwage nuts1) total subsets saving(fwage_nuts1`i'_`u'_`y',replace):summarize `i'			
				* END NEW

				}
		

		
		}

		if "`u'" == "est" | "`u'" == "ltown" {
				gen xF9910_`u'=xF9910 
				gen xF0025_`u'=xF0025
				gen fwage_`u'=fwage
					}
		
		drop lnwage_mu lnwage_within invcountuny xF0025-xF9910 xF9010 xF0075 F0025-F9910 F9010 fwage fwage9	
		
		* Education exposure
		bysort `u': egen educ1uny=sum(educ1*weight)
		bysort `u': egen educ2uny=sum(educ2*weight)
		bysort `u': egen educ3uny=sum(educ3*weight)
		bysort `u': egen educ4uny=sum(educ4*weight)
		replace countuny=educ1uny+educ2uny+educ3uny+educ4uny

		gen xeduc1=(educ1uny-educ1*weight)/(countuny-weight)
		gen xeduc2=(educ2uny-educ2*weight)/(countuny-weight)
		gen xeduc3=(educ3uny-educ3*weight)/(countuny-weight)
		gen xeduc4=(educ4uny-educ4*weight)/(countuny-weight)
		

		drop educ1uny-educ4uny

		tabstat xeduc1 xeduc2 xeduc3 xeduc4 [aweight=weight] if nottoosmall==1 & educ9=="n", by(educ)

		foreach i in xeduc1 xeduc2 xeduc3 xeduc4  {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' if nottoosmall==1 & educ9=="n" [aweight=weight], by(educ) total saving(educ`i'_`u'_`y',replace):summarize `i'
		}				
		
	
		if "`u'" == "est" | "`u'" == "ltown" {
				gen xeduc4_`u'=xeduc4 
				gen xeduc1_`u'=xeduc1
				gen educ_`u'=educ
					}
			
		drop xeduc1-xeduc4 

		* Occupation exposure 
		bysort `u': egen occ1uny=sum(occ1*weight)
		bysort `u': egen occ2uny=sum(occ2*weight)
		bysort `u': egen occ3uny=sum(occ3*weight)

		replace countuny=occ1uny+occ2uny+occ3uny
		gen xocc1=(occ1uny-occ1*weight)/(countuny-weight)
		gen xocc2=(occ2uny-occ2*weight)/(countuny-weight)
		gen xocc3=(occ3uny-occ3*weight)/(countuny-weight)
		
		drop occ1uny-occ3uny

		tabstat xocc1 xocc2 xocc3 [aweight=weight] if nottoosmall==1 & occ9=="n", by(occ)
		
		foreach i in xocc1 xocc2 xocc3 {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' if nottoosmall==1 & occ9=="n" [aweight=weight], by(occ) total saving(occ`i'_`u'_`y',replace):summarize `i'
		}	

		
		
		drop xocc1-xocc3 

		* Age exposure
		bysort `u': egen age1uny=sum(age1*weight)
		bysort `u': egen age2uny=sum(age2*weight)
		bysort `u': egen age3uny=sum(age3*weight)
		bysort `u': egen age4uny=sum(age4*weight)
		replace countuny=age1uny+age2uny+age3uny+age4uny
		
		gen xage1=(age1uny-age1*weight)/(countuny-weight)
		gen xage2=(age2uny-age2*weight)/(countuny-weight)
		gen xage3=(age3uny-age3*weight)/(countuny-weight)
		gen xage4=(age4uny-age4*weight)/(countuny-weight)
		
		drop age1uny-age4uny

		tabstat xage1 xage2 xage3 xage4 [aweight=weight] if nottoosmall==1 & age9=="n", by(age)

		foreach i in xage1 xage2 xage3 xage4  {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' if nottoosmall==1 & age9=="n" [aweight=weight], by(age) total saving(age`i'_`u'_`y',replace):summarize `i'
		}				


		
		drop xage1 xage2 xage3 xage4

		* Gender exposure
		bysort `u': egen femaleuny=sum(female*weight)
		bysort `u': egen maleuny=sum(male*weight)
		replace countuny=femaleuny+maleuny

		gen xfemale=(femaleuny-female*weight)/(countuny-weight)
		gen xmale=(maleuny-male*weight)/(countuny-weight)

		drop femaleuny maleuny

		tabstat xfemale xmale  [aweight=weight] if nottoosmall==1 & female9=="n", by(female)

		foreach i in xfemale xmale    {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' if nottoosmall==1 & female9=="n" [aweight=weight], by(female) total saving(female`i'_`u'_`y',replace):summarize `i'
		}				

		
		drop xfemale xmale

		* Migrant exposure 
		bysort `u': egen migrantuny=sum(migrant*weight)
		bysort `u': egen nonmigrantuny=sum(nonmigrant*weight)
		replace countuny=migrantuny+nonmigrantuny

		gen xmigrant=(migrantuny-migrant*weight)/(countuny-weight)
		gen xnonmigrant=(nonmigrantuny-nonmigrant*weight)/(countuny-weight)

		drop migrantuny nonmigrantuny 

		tabstat xmigrant xnonmigrant [aweight=weight] if nottoosmall==1 & migrant9=="n", by(migrant)
		
		
		foreach i in xmigrant xnonmigrant {
			statsby mean_`i'=r(mean) sd_`i'=r(sd) N_`i'=r(N) sum_w_`i'=r(sum_w) year=`y' if nottoosmall==1 & migrant9=="n" [aweight=weight], by(migrant) total saving(migrant`i'_`u'_`y',replace):summarize `i'
		}
		


		drop xmigrant xnonmigrant 

		drop nbuny weight countuny nottoosmall
		
	
		
	}

************************************************************		
*Files management                                          *
************************************************************		
	foreach u in est firm /*wtown*/ ltown {
		clear
		use dxd0_`u'_`y'
		foreach i of numlist  1/9 {
				merge 1:1 _n using dxd`i'_`u'_`y'.dta  
				drop _merge
				rm dxd`i'_`u'_`y'.dta  
						}
						
		save d_`u'_`y',replace
		rm dxd0_`u'_`y'.dta

		clear
		use fwagexF0025_`u'_`y'
		foreach i in xF2575 xF7590 xF9099 xF9910 wage lnwage lnwage_within countuny invcountuny {
				merge 1:1 _n using fwage`i'_`u'_`y'.dta  
				drop _merge
				rm fwage`i'_`u'_`y'.dta  
						}
						
		save fwage_`u'_`y',replace
		rm fwagexF0025_`u'_`y'.dta

		* NEW
		clear
		use fwagexF0025h_`u'_`y'
		foreach i in xF2575h xF7590h xF9099h xF9910h hwage lnhwage lnhwage_within {
				merge 1:1 _n using fwage`i'_`u'_`y'.dta  
				drop _merge
				rm fwage`i'_`u'_`y'.dta  
						}
						
		save fhwage_`u'_`y',replace
		rm fwagexF0025h_`u'_`y'.dta
		*END of NEW
		
		clear
		use educxeduc1_`u'_`y'
		foreach i of numlist  2/4 {
				merge 1:1 _n using educxeduc`i'_`u'_`y'.dta  
				drop _merge
				rm educxeduc`i'_`u'_`y'.dta  
						}
						
		save educ_`u'_`y',replace
		rm educxeduc1_`u'_`y'.dta

	
		clear
		use occxocc1_`u'_`y'
		foreach i of numlist  2/3 {
				merge 1:1 _n using occxocc`i'_`u'_`y'.dta  
				drop _merge
				rm occxocc`i'_`u'_`y'.dta  
						}
						
		save occ_`u'_`y',replace
		rm occxocc1_`u'_`y'.dta		

		
		clear
		use agexage1_`u'_`y'
		foreach i of numlist  2/4 {
				merge 1:1 _n using agexage`i'_`u'_`y'.dta  
				drop _merge
				rm agexage`i'_`u'_`y'.dta  
						}
						
		save age_`u'_`y',replace
		rm agexage1_`u'_`y'.dta		
		

		clear
		use femalexfemale_`u'_`y'
		merge 1:1 _n using femalexmale_`u'_`y'.dta  
		drop _merge
		rm femalexmale_`u'_`y'.dta  				
		save female_`u'_`y',replace
		rm femalexfemale_`u'_`y'.dta  						
				
		clear
		use migrantxmigrant_`u'_`y'
		merge 1:1 _n using migrantxnonmigrant_`u'_`y'.dta  
		drop _merge
		rm migrantxnonmigrant_`u'_`y'.dta  				
		save migrant_`u'_`y',replace
		rm migrantxmigrant_`u'_`y'.dta  						
			
			if "`u'" == "est" {
				foreach c in fin gc man size nuts1 sector_agg {
						clear
						use fwage_`c'xF0025_est_`y'
						foreach i in xF2575 xF7590 xF9099 xF9910 wage lnwage lnwage_within countuny invcountuny  {
								merge 1:1 _n using fwage_`c'`i'_est_`y'.dta  
								drop _merge
								rm fwage_`c'`i'_est_`y'.dta  
								}							
						save fwage_`c'_est_`y',replace
						rm fwage_`c'xF0025_est_`y'.dta

					}

				clear
			
		}

	}

	foreach u in est firm /*wtown*/ ltown {
		foreach f in d_`u' fhwage_`u' fwage_`u' educ_`u' occ_`u' age_`u' female_`u' migrant_`u'   {
			clear
			if `n'==1 {
				use `f'_`y'
				save "`f'.dta", replace
				}
			else {
				use `f'
				append using `f'_`y'
				save "`f'.dta", replace
				}
			clear	
			rm `f'_`y'.dta					
				
		}
	}

	
	
	
*NEW 2018_03_14: deswage	
	foreach f in deswage  {
		clear
		if `n'==1 {
			use `f'_`y'
			save "`f'.dta", replace
			}
		else {
			use `f'
			append using `f'_`y'
			save "`f'.dta", replace
			}
		clear
		rm `f'_`y'.dta

	}

		foreach c in fin gc man size nuts1 sector_agg {
		foreach f in fwage_`c'_est  {
			clear
			if `n'==1 {
				use `f'_`y'
				save "`f'.dta", replace
				}
			else {
				use `f'
				append using `f'_`y'
				save "`f'.dta", replace
				}
		clear
		rm `f'_`y'.dta
		}
	}
	local n=`n'+1		
}


**************************************************************************
* THANK YOU ! THANK YOU ! THANK YOU ! THANK YOU ! THANK YOU ! THANK YOU !*
**************************************************************************
